---
title: "R_1"
author: "Loreto Cox & Carmen Le Foulon"
date: "2024-01-26"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(tidyverse)
library(readxl)
library(janitor)
library(readr)

```

# Info

Replication file for "More Options, but Less Willing to Cast a Valid Vote: Evidence from Electoral Reform in Chile":  Figures 1, 4, A1, A2

Modified on 03/06/2024 updating the code for Figure 4 so that "Predicted Values" axis starts in 0.

# Data for Figure 1, A1 & A2

```{r}
base_comunas_distritos <- readRDS("data/base_comunas_distritos.rds")

mdip <- base_comunas_distritos %>% 
  select(distrito=dip_dist_id, mdip=dip_dist_seats) %>% unique() %>% filter(is.na(distrito)==F)

distant <- base_comunas_distritos %>% 
  select(distrito=dip_dist_id, mdip=dip_dist_seats, distant=dip_dist_ant_id) %>% unique() %>% filter(is.na(distrito)==F)
```




```{r}

dip <- readxl::read_excel("data/resultados_elecciones_diputados_ce_1989_2017.xlsx")


dip <- janitor::clean_names(dip)
dip <- dip %>% mutate(elec=ifelse(is.na(electo)==T, 0, ifelse(electo=="SI", 1, 2)))



d21 <- readr::read_delim("data/2021_11_Diputados_Votacion.txt", delim = "|") 
d21 <- janitor::clean_names(d21)

d21 <- d21 %>% mutate(elec=ifelse(is.na(electo)==T, 0, ifelse(electo=="ELECTO", 1, 2))) 
#table(d21$elec, d21$electo, useNA = "always")  


d21 <- d21 %>%  mutate(distrito=as.integer(gsub("DISTRITO ", "", distrito))) %>% 
                group_by(distrito, 
                         lista, pacto, partido, nombres, primer_apellido, segundo_apellido, elec) %>%
                summarise(votos=sum(votos, na.rm=T)) %>% ungroup()

d21 <- d21 %>% mutate(ind=ifelse(gdata::startsWith(partido, "INDEPENDIENTE ", trim=TRUE)==T, 1, 0), 
              party=case_when(
              partido=="INDEPENDIENTE PARTIDO REGIONALISTA INDEPENDIENTE DEMOCRATA"~"PARTIDO REGIONALISTA INDEPENDIENTE DEMOCRATA",
            partido=="PARTIDO REGIONALISTA INDEPENDIENTE DEMOCRATA"~"PARTIDO REGIONALISTA INDEPENDIENTE DEMOCRATA",
            partido=="INDEPENDIENTE UNION DEMOCRATA INDEPENDIENTE"~"UNION DEMOCRATA INDEPENDIENTE",
            partido=="UNION DEMOCRATA INDEPENDIENTE"~"UNION DEMOCRATA INDEPENDIENTE",
            ind==1~gsub("INDEPENDIENTE ", "",partido),
            TRUE~partido), 
            year=2021, 
            candidato_a=paste(nombres, primer_apellido, segundo_apellido, sep=" "),
            candfp=case_when(lista=="ZZI"~1,
                              TRUE~0),
            tipo_voto=case_when(
                          grepl("VOTOS NULOS", nombres)==T~"nulo",
                          grepl("VOTOS EN BLANCO", nombres)==T~"blanco",
                          TRUE~"valido"),
                val=ifelse(tipo_voto=="valido", 1, 0))


dist <- dip %>% group_by(ano_de_eleccion, distrito, lista, partido, sigla_partido, candidato_a, elec) %>%
                summarise(votos=sum(votos_totales, na.rm = T)) %>% ungroup()

dist <- dist %>% mutate(candfp=case_when(
                                  ano_de_eleccion>=1989 & ano_de_eleccion<=2001 & lista=="CI"~1,
                                  ano_de_eleccion>=2005 & ano_de_eleccion<=2009 & lista=="G"~1,
                                  ano_de_eleccion==2013 & lista=="L"~1,
                                  ano_de_eleccion==2017 & lista=="R"~1,
                                  ano_de_eleccion==2021 & lista=="ZZI"~1,
                                  TRUE~0),
                        party=case_when(
              partido=="INDEPENDIENTE REGIONALISTA INDEPENDIENTE"~"REGIONALISTA INDEPENDIENTE",
            partido=="REGIONALISTA INDEPENDIENTE"~"REGIONALISTA INDEPENDIENTE",
            partido=="INDEPENDIENTE UNION DEMOCRATA INDEPENDIENTE"~"UNION DEMOCRATA INDEPENDIENTE",
            partido=="UNION DEMOCRATA INDEPENDIENTE"~"UNION DEMOCRATA INDEPENDIENTE",
            sigla_partido=="IND" & candfp==0~gsub("INDEPENDIENTE ", "",partido),
            TRUE~partido),
            ind=ifelse((sigla_partido=="INDEP" & ano_de_eleccion<2017) | (sigla_partido=="IND" & ano_de_eleccion==2017), 1, 0),
             tipo_voto=case_when(
                          grepl("VOTOS NULOS", candidato_a)==T~"nulo",
                          grepl("VOTOS EN BLANCO", candidato_a)==T~"blanco",
                          TRUE~"valido"),
                        val=ifelse(tipo_voto=="valido", 1, 0))

## creates variables:
# party_1 to calculate effective number of parties: each independent candidates (either running within or outside a list) is considered as a distinct party
# list_1 to calculate effective number of lists: each independet candidate running outside a list is considered a distinct list. 


dist <- dist %>% mutate(party_1=case_when(
                                    candfp==1~candidato_a,
                                    ind==1~candidato_a,
                                    TRUE~partido),
                        list_1=case_when(
                                    candfp==1~candidato_a,
                                    TRUE~lista))


d21 <- d21 %>% mutate(year=2021, 
                      candidato_a=paste(nombres, primer_apellido, segundo_apellido, sep=" "),
                      candfp=case_when(
                                  lista=="ZZI"~1,
                                  TRUE~0),
                        party_1=case_when(
                                    candfp==1~candidato_a,
                                    ind==1~candidato_a,
                                    TRUE~partido),
                        list_1=case_when(
                                    candfp==1~candidato_a,
                                    TRUE~lista))



dd <- dist %>% select(year=ano_de_eleccion, distrito, lista, candidato_a, candfp, ind, tipo_voto, val, party, partido, party_1, list_1,  elec, votos)

dd21 <- d21 %>% select(year, distrito, lista,  candidato_a, candfp, ind, tipo_voto, val, party, partido, party_1, list_1, elec, votos)

## final dataset: per candidate per district: with variables party_1 and list_1 to calculate effective number of parties and lists

dd <- bind_rows(dd, dd21)

```


# Figure 1

## Intermediate datasets
```{r}

d1 <- dd %>% filter(year==2013 & is.na(lista)==F) 

d2 <- dd %>% filter(year==2017 & is.na(lista)==F) 


## 2013
nlista13 <- d1 %>% filter(candfp==0)  %>% 
          count(distrito, lista) %>%
  count(distrito)  %>% select(distrito,  nlista13=n)

tcand13 <- d1 %>% count(distrito) %>% select(distrito, ncand13=n)

party13 <-  d1 %>% filter(candfp==0 & ind==0) %>% 
            count(distrito, party_1) %>% count(distrito) %>%
            select(distrito, nparty13=n)


tot13 <- full_join(nlista13, tcand13) %>% full_join(party13) %>% 
          full_join(party13) %>% select(distant=distrito, everything()) %>%
          full_join(distant)

## 2017

nlista17 <- d2 %>% filter(candfp==0)  %>% 
          count(distrito, lista) %>%
  count(distrito)  %>% select(distrito, nlista17=n)

tcand17 <- d2 %>% count(distrito) %>% select(distrito, ncand17=n)

party17 <-  d2 %>% filter(candfp==0 & ind==0) %>% 
            count(distrito, party_1) %>% count(distrito) %>%
            select(distrito, nparty17=n)

party17_party <-  d2 %>% filter(candfp==0) %>% 
            count(distrito, party) %>% count(distrito) %>%
            select(distrito, nparty17_party=n)

tot17 <- full_join(nlista17, tcand17 ) %>% full_join(party17) %>% 
          full_join(mdip)
```

```{r}

## List per post-reform district magnitude

nl17 <- tot17 %>% group_by(mdip) %>% summarise(
                                     mean=mean(nlista17), min=min(nlista17), 
                                     max=max(nlista17), sd=sd(nlista17), n=n()) %>%
                                    mutate(Year="2017", tipo="n_listas")

nl13 <-tot13 %>% group_by(mdip) %>% summarise(
                                   mean=mean(nlista13), min=min(nlista13), 
                                   max=max(nlista13), sd=sd(nlista13), n=n()) %>%
                                  mutate(Year="2013", tipo="n_listas")


res_nl <- bind_rows(nl13, nl17) 


## Number of parties per post-reform district magnitude

np17 <- tot17 %>% group_by(mdip) %>%  summarise(
                                        mean=mean(nparty17), min=min(nparty17),
                                        max=max(nparty17), sd=sd(nparty17), n=n()) %>%
                                   mutate(Year="2017", tipo="n_party")

np13 <-tot13 %>% group_by(mdip) %>% summarise(
                                        mean=mean(nparty13), min=min(nparty13),
                                        max=max(nparty13), sd=sd(nparty13), n=n()) %>%
                                   mutate(Year="2013", tipo="n_party")


res_np <- bind_rows(np13, np17)


## Mean cand per post-reform district magnitude

nc17 <- tot17 %>% group_by(mdip) %>% summarise(sd=sd(ncand17),
                                             mean=mean(ncand17), min=min(ncand17), 
                                             max=max(ncand17), sd=sd(ncand17), n=n()) %>%
                                              mutate(Year="2017", tipo="n_cand")

nc13 <-tot13 %>% group_by(mdip) %>% summarise(sd=sd(ncand13),
                                          mean=mean(ncand13), min=min(ncand13), 
                                          max=max(ncand13),sd=sd(ncand13),  n=n()) %>%
                                          mutate(Year="2013", tipo="n_cand")


res_nc <- bind_rows(nc13, nc17) 

## Mean cand per sear per post-reform district magnitude



ncp17 <- tot17 %>% mutate(cand_perseat17=ncand17/mdip) %>% 
                  group_by(mdip) %>% summarise(
                                             mean=mean(cand_perseat17), min=min(cand_perseat17), 
                                             max=max(cand_perseat17), sd=sd(cand_perseat17), n=n()) %>%
                                              mutate(Year="2017", tipo="n_cand")

ncp13 <-tot13  %>% mutate(cand_perseat13=ncand13/2) %>% 
                group_by(mdip) %>% summarise(
                                          mean=mean(cand_perseat13), min=min(cand_perseat13), 
                                          max=max(cand_perseat13), sd=sd(cand_perseat13), n=n()) %>%
                                          mutate(Year="2013", tipo="n_cand")



res_ncp <- bind_rows(ncp13, ncp17) 


```

## Figure 1
```{r}
 stitle <- 9
 saxis <- 9
 sat<-10
 sleg <- 11

glist <- ggplot(res_nl) + 
  geom_point(aes(x=mdip, y=mean, colour=Year, shape=Year), size=3) +
  geom_errorbar(aes(x=mdip, ymin=min, ymax=max, 
                    colour=Year), linewidth=0.8, width=0.05) +
  scale_y_continuous(limits=c(0,10), expand = c(0,0)) +theme_bw() +
  scale_colour_manual(name = "", labels = c("2013", "2017"), values = c( "darkgoldenrod1", "darkblue")) +
  scale_shape_manual(name = "", labels = c("2013", "2017"), values = c( 19, 17)) +
ggtitle("Mean number of lists per district")  + 
   xlab("District Magnitude") + ylab("Number of lists") +
   theme(axis.text = element_text(size=saxis),axis.title = element_text(size = sat),
         title =element_text(size=stitle),
         legend.text=element_text(size=sleg)) 


gparty <-  ggplot(res_np) + 
   geom_point(aes(x=mdip, y=mean, colour=Year, shape=Year), size=3) +
   geom_errorbar(aes(x=mdip, ymin=min, ymax=max, 
                     colour=Year),  linewidth=0.8, width=0.05) +
   scale_y_continuous(limits=c(0,25), expand = c(0,0)) +theme_bw() +
   scale_colour_manual(name = "", labels = c("2013", "2017"), values = c( "darkgoldenrod1", "darkblue")) +
   scale_shape_manual(name = "", labels = c("2013", "2017"), values = c( 19, 17)) +
   ggtitle("Mean number of parties per district")  + 
   xlab("District Magnitude") + ylab("Number of parties") +
   theme(axis.text = element_text(size=saxis),axis.title = element_text(size = sat),
         title =element_text(size=stitle),
         legend.text=element_text(size=sleg)) 


gcand <-   ggplot(res_nc) + 
   geom_point(aes(x=mdip, y=mean, colour=Year, shape=Year), size=3) +
   geom_errorbar(aes(x=mdip, ymin=min, ymax=max, 
                     colour=Year), linewidth=0.8, width=0.05) +
   scale_y_continuous(limits=c(0,60), expand = c(0,0)) +theme_bw() +
   scale_colour_manual(name = "", labels = c("2013", "2017"), values = c("darkgoldenrod1", "darkblue")) +
   scale_shape_manual(name = "", labels = c("2013", "2017"), values = c( 19, 17)) +
   ggtitle("Mean number of candidates per district")  + 
   xlab("District Magnitude") + ylab("Number of candidates") +
   theme(axis.text = element_text(size=saxis),axis.title = element_text(size = sat),
         title =element_text(size=stitle),
         legend.text=element_text(size=sleg)) 


gcand_perseat <-   ggplot(res_ncp) + 
   geom_point(aes(x=mdip, y=mean, colour=Year, shape=Year), size=3) +
   geom_errorbar(aes(x=mdip, ymin=min, ymax=max, 
                     colour=Year), linewidth=0.8, width=0.05) +
   scale_y_continuous(limits=c(0,10), expand = c(0,0)) +theme_bw() +
   scale_colour_manual(name = "", labels = c("2013", "2017"), values = c( "darkgoldenrod1", "darkblue")) +
   scale_shape_manual(name = "", labels = c("2013", "2017"), values = c( 19, 17)) +
   ggtitle("Mean number of candidates per seat per district")  + 
   xlab("District Magnitude") + ylab("Number of candidates per seat") +
   theme(axis.text = element_text(size=saxis),axis.title = element_text(size = sat),
         title =element_text(size=stitle),
         legend.text=element_text(size=sleg)) 


gall <- ggpubr::ggarrange(
  gcand , NULL, gcand_perseat,   NULL, NULL, NULL, gparty, NULL, glist,
  nrow = 3, ncol=3, widths = c(1, 0.09, 1), heights=c(1,0.1, 1),
  common.legend = TRUE, legend="bottom")

ggsave("figures/figure_1.pdf", gall, width = 8, height = 5.5)
```






# Figures A1 & A2: Effective number of parties and lists in the Lower House


## Figure A1: Effective and real number of parties: competing and winning

```{r}

val_year <- dd %>% filter(val==1) %>% 
                  group_by(year) %>% summarise(val_tot=sum(votos)) %>% ungroup()

n_year <- dd %>% filter(elec==1) %>% 
                  group_by(year) %>% summarise(n_tot=n()) %>% ungroup()


eff <- dd %>% filter(val==1) %>% group_by(year, party_1) %>%
               summarise(votos=sum(votos)) %>% ungroup() %>% full_join(val_year) %>% 
               mutate(per1=votos/val_tot, p1=per1*per1) %>% group_by(year) %>%
              summarise(sum_party1=sum(p1)) %>% ungroup() %>% mutate(eff_part=1/sum_party1) %>% 
              select(year, eff_part)

eff_elec <- dd %>% filter(val==1 & elec==1) %>% group_by(year, party_1) %>%
               summarise(n=n()) %>% ungroup() %>% full_join(n_year) %>% 
               mutate(per1=n/n_tot, p1=per1*per1) %>% group_by(year) %>%
              summarise(sum_party1=sum(p1)) %>% ungroup() %>% mutate(eff_elec=1/sum_party1) %>% 
              select(year, eff_elec)

nreal <- dd %>% filter(ind==0 & candfp==0) %>% count(year, party_1) %>% count(year) %>% select(year, n_parties=n)
              
nreal_elec <- dd %>% filter(ind==0 & candfp==0 & elec==1) %>% count(year, party_1) %>% count(year) %>% select(year, n_elec=n)

eff_parties <- full_join(eff, eff_elec) %>% full_join(nreal) %>% full_join(nreal_elec)

party <- eff_parties %>%
  pivot_longer(!year, names_to = "tipo", values_to = "eff")

a1 <- ggplot(party) + geom_line(aes(x=year, y=eff,  linetype=tipo))  + 
  scale_x_continuous(breaks=seq(1989, 2021, by=4), expand=c(0,0), minor_breaks = NULL) +
  scale_y_continuous(breaks=seq(0, 30, by=5),  minor_breaks = NULL, limits=c(0,30), expand=c(0,0)) + 
  labs(x = "", y = "") + 
  geom_point(aes(x=year, y=eff, shape=tipo), size=2) +
  scale_linetype_discrete(labels = c("Effective number of winning parties", 
                                     "Effective number of parties",
                                     "Number of winning parties",
                                     "Number of parties")) +
  scale_shape_manual(labels = c("Effective number of winning parties", 
                                  "Effective number of parties",
                                  "Number of winning parties",
                                  "Number of parties"), 
                       values=c(19, 17, 1, 3 )) +
  theme_bw() + guides(linetype=guide_legend(nrow=2), 
                      shape=guide_legend(nrow=2)) +
  theme(legend.position="bottom", legend.title=element_blank())

ggsave("figures/fig_a1.pdf", a1, width = 7, height = 4.5)

```

## Figure A2: Effective and real number of lists: competing and winning

```{r}

val_year <- dd %>% filter(val==1) %>% 
                  group_by(year) %>% summarise(val_tot=sum(votos)) %>% ungroup()

n_year <- dd %>% filter(elec==1) %>% 
                  group_by(year) %>% summarise(n_tot=n()) %>% ungroup()


eff_l <- dd %>% filter(val==1) %>% group_by(year, list_1) %>%
               summarise(votos=sum(votos)) %>% ungroup() %>% full_join(val_year) %>% 
               mutate(per1=votos/val_tot, p1=per1*per1) %>% group_by(year) %>%
              summarise(sum_party1=sum(p1)) %>% ungroup() %>% mutate(eff_lista=1/sum_party1) %>% 
              select(year, eff_lista)

eff_elec_l <- dd %>% filter(val==1 & elec==1) %>% group_by(year, list_1) %>%
               summarise(n=n()) %>% ungroup() %>% full_join(n_year) %>% 
               mutate(per1=n/n_tot, p1=per1*per1) %>% group_by(year) %>%
              summarise(sum_party1=sum(p1)) %>% ungroup() %>% mutate(eff_lista_elec=1/sum_party1) %>% 
              select(year, eff_lista_elec)

nreal_l <- dd  %>% filter(candfp==0) %>% count(year, list_1) %>% count(year) %>% select(year, n_lists=n)
              
nreal_elec_l <- dd %>% filter(elec==1 & candfp==0) %>% count(year, list_1) %>% count(year) %>% select(year, n_win_lists=n)

eff_lists <- full_join(eff_l,  eff_elec_l) %>% full_join(nreal_l) %>% full_join(nreal_elec_l) 


lists <- eff_lists %>%
  pivot_longer(!year, names_to = "tipo", values_to = "eff")



a2 <- ggplot(lists) + geom_line(aes(x=year, y=eff,  linetype=tipo))  + 
  scale_x_continuous(breaks=seq(1989, 2021, by=4), expand=c(0,0), minor_breaks = NULL) +
  scale_y_continuous(breaks=seq(0, 15, by=5),  minor_breaks = NULL, limits=c(0,15), expand=c(0,0)) + 
  labs(x = "", y = "") + 
  geom_point(aes(x=year, y=eff, shape=tipo), size=2) +
  scale_linetype_discrete(labels = c("Effective number of lists", 
                                     "Effective number of winning lists",
                                     "Number of lists",
                                     "Number of winning lists")) +

  scale_shape_manual(labels = c("Effective number of lists", 
                                "Effective number of winning lists",
                                "Number of lists",
                                "Number of winning lists"), 
                       values=c(19, 17, 1, 3 )) +
  theme_bw() + guides(linetype=guide_legend(nrow=2), 
                      shape=guide_legend(nrow=2)) +
  theme(legend.position="bottom", legend.title=element_blank())

ggsave("figures/fig_a2.pdf", a2, width = 7, height = 4.5)

```



# Figure 4: Predicted values of invalid voting for House elections in 2013 (pre-reform) and 2017 (post-reform)

Dataset created in Do file "R_2".


```{r}
pred_plot <- readxl::read_excel("data/pred_plot.xlsx")
ggplot(pred_plot) + 
  geom_pointrange(aes(x=change, y=pred, 
                ymin=ci_low, ymax=ci_high, shape=as.factor(year), 
                colour=as.factor(year))) +
  scale_x_continuous(breaks=seq(1, 6, by=1), minor_breaks = NULL, limits=c(1, 6)) +
  scale_y_continuous(breaks=seq(0, .14, by=.02), 
                     minor_breaks = NULL, limits=c(0,.13), expand=c(0,0)) + 
  labs(x = "Change in district magnitude", y = "Predicted values") +
  scale_colour_manual(values=c( "darkgrey", "black")) +
  theme_bw() + 
  
  theme(legend.position="bottom", legend.title=element_blank())


```

